# -*- coding: utf-8 -*-
"""
Created on Wed Mar  2 20:01:54 2016

@author: ppradeep
"""

#%%
import csv
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.font_manager import FontProperties 

path = 'W:/Projects/HinderedPhenols-ReadAcross/'

##
## Plots for read-across prediction results
##

f1 = open(path+'Number/RA-HP-LitDataSources-Summary-Dist.csv','r')
readCSV1 = csv.reader(f1, delimiter=',')
readCSV1.next()
metrics_p0 = {}
for line in readCSV1:
    if line[0] == 'PubChem' and line[2] == '1':
        metrics_p0[int(line[1])] = [float(line[3]), float(line[4])]
f1.close()

f2 = open(path+'Number/RA-HP-LitDataSources-Summary-Dist.csv','r')
readCSV2 = csv.reader(f2, delimiter=',')
readCSV2.next()
metrics_p1 = {}
for line in readCSV2:
    if line[0] == 'PubChem' and line[2] == '4':
        metrics_p1[int(line[1])] = [float(line[3]), float(line[4])]
f2.close()
        
f3 = open(path+'Number/RA-HP-Global-Summary-Dist.csv','r')
readCSV3 = csv.reader(f3, delimiter=',')
readCSV3.next()
metrics_p2 = {}
for line in readCSV3:
    if line[0] == 'PubChem':
        metrics_p2[int(line[1])] = [float(line[2]), float(line[3])]
f3.close()

f4 = open(path+'Number/RA-HP-Local-Summary-Dist.csv','r')
readCSV4 = csv.reader(f4, delimiter=',')
readCSV4.next()
metrics_p3 = {}
for line in readCSV4:
    if line[0] == 'PubChem':
        metrics_p3[int(line[1])] = [float(line[2]), float(line[3])]
f4.close()
 
#%%    
# Accuracy    
x = range(1,11) # x = no. of analogs
ind = np.arange(len(x))
fig = plt.figure(figsize=(12,8), dpi = 100)
ax = fig.add_subplot(111)
for i in x:
    y1 = metrics_p0[i][0]; y2 = metrics_p1[i][0]; y3 = metrics_p2[i][0]; y4 =  metrics_p3[i][0]
    rects1 = ax.bar(ind[i-1]-0.30, y1, 0.2, color='blue', align='center') #yellow
    rects2 = ax.bar(ind[i-1]-0.15, y2, 0.2, color='green', align='center') #lightgreen
    rects3 = ax.bar(ind[i-1], y3, 0.2, color='red', align='center') #mediumorchid
    rects4 = ax.bar(ind[i-1]+.15, y4, 0.2, color='gray', align='center') #darkorange
    
line=plt.axhline(y = metrics_p1[1][0]+0.05, c = 'green') # Straight line for Accuracy at 1 analog and Data Source >=4
ax.set_xlim(-.5,len(x)-.5)
ax.set_ylim(60,100)    
ax.set_ylabel('Accuracy (%)', fontsize = 24)
ax.set_xlabel('Number of Analogs', fontsize = 24)
plt.xticks(ind, x)
ax.tick_params(axis='x', labelsize=18)
plt.yticks(range(60,105,5))
ax.xaxis.labelpad = 15
ax.yaxis.labelpad = 15
plt.axes()
fontP = FontProperties()
fontP.set_size('16') 
ax.legend( (rects1[0], rects2[0], line, rects3[0], rects4[0]), ('No Filtering', 'Data Confidence Filtering', 'Baseline Performance', 'Global Filtering', 'Local Filtering'), prop = fontP, loc='upper right')
plt.show()
plt.savefig(path+'Number/RA-HP-Acc-Dist.png', bbox_inches='tight')


# Balanced accuracy
fig = plt.figure(figsize=(12,8), dpi = 100)
ax = fig.add_subplot(111)
for i in x:
    y1 = metrics_p0[i][1]; y2 = metrics_p1[i][1]; y3 = metrics_p2[i][1]; y4 =  metrics_p3[i][1]
    rects1 = ax.bar(ind[i-1]-0.30, y1, 0.2, color='blue', align='center') #yellow
    rects2 = ax.bar(ind[i-1]-0.15, y2, 0.2, color='green', align='center') #lightgreen
    rects3 = ax.bar(ind[i-1], y3, 0.2, color='red', align='center') #mediumorchid
    rects4 = ax.bar(ind[i-1]+.15, y4, 0.2, color='gray', align='center') #darkorange
    
plt.axhline(y = metrics_p1[1][1]+0.05, c = 'green')   #Straight line for BA at 1 analog and Data Source >=4
ax.set_xlim(-.5,len(x)-.5)
ax.set_ylim(60,100)    
ax.set_ylabel('Balanced Accuracy (%)', fontsize = 24)
ax.set_xlabel('Number of Analogs', fontsize = 24)
plt.xticks(ind, x)
ax.tick_params(axis='x', labelsize=18)
plt.yticks(range(60,105,5))
ax.xaxis.labelpad = 15
ax.yaxis.labelpad = 15
plt.axes()

fontP = FontProperties()
fontP.set_size('16') 
ax.legend( (rects1[0], rects2[0], line, rects3[0], rects4[0]), ('No Filtering', 'Data Confidence Filtering', 'Baseline Performance', 'Global Filtering', 'Local Filtering'), prop = fontP, loc='upper right')
plt.show()
plt.savefig(path+'Number/RA-HP-BA-Dist.png', bbox_inches='tight')

#%%
##
## Plots for bootstrapping confidence intervals
##
import numpy as np
import scipy as sp
import scipy.stats
import csv

## Funtion
def mean_confidence_interval(data, confidence=0.95):
    a = 1.0*np.array(data)
    n = len(a)
    m, se = np.mean(a), scipy.stats.sem(a)
    h = se * sp.stats.t._ppf((1+confidence)/2., n-1)
    return m, m-h, m+h


f1 = open(path+'Number/Bootstrap-0629/RA-HP-Lit-Summary_Bootstrap.csv','r')
readCSV1 = csv.reader(f1, delimiter=',')
readCSV1.next()
metrics = {}
for line in readCSV1:
    if line[0] == 'PubChem':
        metrics.setdefault(int(line[2]),[]).append([float(line[1]), float(line[4]), float(line[5])])
f1.close()

f1 = open(path+'Number/Bootstrap-0629/RA-HP-Global-Summary_Bootstrap.csv','r')
readCSV1 = csv.reader(f1, delimiter=',')
readCSV1.next()
metrics_g = {}
for line in readCSV1:
    if line[0] == 'PubChem':
        metrics_g.setdefault(int(line[2]),[]).append([float(line[1]), float(line[3]), float(line[4])])
f1.close()

f1 = open(path+'Number/Bootstrap-0629/RA-HP-Local-Summary_Bootstrap.csv','r')
readCSV1 = csv.reader(f1, delimiter=',')
readCSV1.next()
metrics_l = {}
for line in readCSV1:
    if line[0] == 'PubChem':
        metrics_l.setdefault(int(line[2]),[]).append([float(line[1]), float(line[3]), float(line[4])])
f1.close()


import numpy as np
import matplotlib.pyplot as plt

x = range(1,2) # x = no. of analogs
for i in x:
    # the histogram of the data
    fig = plt.figure(figsize=(10,6), dpi = 100)
    ax = fig.add_subplot(111)

    y0 = [metric[1] for metric in metrics[i]]
    n, bins, patches = plt.hist(y0, 100, label = 'Data Confidence Filtering', facecolor='lightgreen', alpha=0.5)
    
    y1 = [metric_g[1] for metric_g in metrics_g[i] ]
    n, bins, patches = plt.hist(y1, 100, label = 'Global Filtering', facecolor='mediumorchid', alpha=0.5)

    y2 = [metric_l[2] for metric_l in metrics_l[i] ]
    n, bins, patches = plt.hist(y2, 100, label = 'Local Filtering', facecolor='darkorange', alpha=0.5)

    ax.set_xlim(80, 95)
    ax.set_ylabel('Frequency')
    ax.set_xlabel('Accuracy (%)')
    plt.legend()
    #plt.savefig(path+'Number/RA-HP-Bootsrap-Acc.png')


## Confidence Interval of Accuracy
CI_Acc = [mean_confidence_interval(y0, confidence=0.95), mean_confidence_interval(y1, confidence=0.95), mean_confidence_interval(y2, confidence=0.95)]
## Confidence Interval of Accuracy
CI_BA = [mean_confidence_interval(y0, confidence=0.95), mean_confidence_interval(y1, confidence=0.95), mean_confidence_interval(y2, confidence=0.95)]

## Plot of CIs
print "CI of Accuracy:"
print "Data quality:", CI_Acc[0]
print "Global filtering:", CI_Acc[1]
print "Local filtering:", CI_Acc[2]
print ""
print "CI of Accuracy:"
print "Data quality:", CI_BA[0]
print "Global filtering:", CI_BA[1]
print "Local filtering:", CI_BA[2]

y = [.32, .52, .72]
#labels = ['Data \n Quality \nFiltering', 'Global \nFiltering', 'Local \nFiltering']
labels = ['Data \n Confidence', 'Global', 'Local']
fig = plt.figure(figsize=(12, 8), dpi = 100)
ax = fig.add_subplot(111)

plt.errorbar(y = [0.3, 0.5, 0.7], x = [mean for mean, top, bot in CI_Acc], xerr = [(top-bot)/2 for mean, top, bot in CI_Acc], fmt = 'o', ecolor = 'b', markersize='8')
plt.errorbar(y = [0.35, 0.55, 0.75], x = [mean for mean, top, bot in CI_BA], xerr = [(top-bot)/2 for mean, top, bot in CI_BA], fmt = 'rs', ecolor = 'r', markersize='8')

fontP = FontProperties()
fontP.set_size('18') 
plt.legend(['Accuracy', 'Balanced Accuracy'], prop = fontP, numpoints = 1)
#plt.legend()
plt.ylim(0.25,0.8)

#plt.xlim(85,90.5)
plt.yticks(y, labels)
plt.xlabel('Percentage (%)', fontsize=24)
plt.ylabel('Filtering level', fontsize=24)

ax.tick_params(axis='x', labelsize=18)
ax.tick_params(axis='y', labelsize=16)

ax.xaxis.labelpad = 15
ax.yaxis.labelpad = 15

plt.show()
#%%
plt.savefig(path+'Number/RA-HP-Bootsrap-CI.png', bbox_inches='tight')
